We saw in Chapter
10 the square root function, it’s just one instance of
an example of a power-function.
Why did we observe a decrease of the accuracy of the NN prediction of the square-root outside the interval \([0,1]\) (note we trained inside \([0,1]\))? How can you improve on the prediction of the square-root network?
Can you design a more generic NN network that can learn and predict a power-function for a given power parameter \(λ∈R\)?
rand_data <- abs(runif(1000, 0, 100))
sqrt_df <- data.frame(rand_data, sqrt_data=sqrt(rand_data))
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
s <- seq(from=0, to=100, length.out=1000)
plot_ly(x = ~s, y = ~sqrt(s), type="scatter", mode = "lines") %>%
layout(title='Square-root Funciton',
xaxis = list(title="Input (x)", scaleanchor="y"),
yaxis = list(title="Output (y=sqrt(x))", scaleanchor="x"),
legend = list(orientation = 'h'))
set.seed(1234)
net.sqrt <- neuralnet::neuralnet(sqrt_data ~ rand_data, sqrt_df, hidden=10, threshold=0.1)
N <- 200 # out of range [100: 200] is also included in the testing!
test_data <- seq(0, N, 0.1); test_data_sqrt <- sqrt(test_data)
test_data.df <- data.frame(rand_data=test_data, sqrt_data=sqrt(test_data));
pred_sqrt <- predict(net.sqrt, test_data.df)
plot_ly(x = ~pred_sqrt[,1], y = ~test_data_sqrt, type = "scatter", mode="markers", name="scatter") %>%
add_trace(x = c(0,14), y = c(0,14), mode="lines", line = list(width = 4), name="Ideal Agreement") %>%
layout(title='Scatterplot Predicted vs. Actual SQRT',
xaxis = list(title="NN Predicted", scaleanchor="y"),
yaxis = list(title="Actual Value (y=sqrt(x))", scaleanchor="x"),
legend = list(orientation = 'h'))
compare_df <-data.frame(pred_sqrt, test_data_sqrt); # compare_df
plot_ly(x = ~test_data, y = ~test_data_sqrt, type="scatter", mode="lines", name="SQRT") %>%
add_trace(x = ~test_data, y = ~pred_sqrt, mode="markers", name="NN Model Prediction") %>%
layout(title='Predicted vs. Actual SQRT',
xaxis = list(title="Inputs"),
yaxis = list(title="Outputs (y=sqrt(x))"),
legend = list(orientation = 'h'))
Use the SOCR Normal and Schizophrenia pediatric neuroimaging study data to complete the following tasks:
Conduct some initial data visualization and exploration
library(rvest)
wiki_url <- read_html("https://wiki.socr.umich.edu/index.php/SOCR_Data_Oct2009_ID_NI")
html_nodes(wiki_url, "#content")
## {xml_nodeset (1)}
## [1] <div id="content" class="mw-body" role="main">\n\t\t\t<a id="top"></a>\n\ ...
sch <- html_table(html_nodes(wiki_url, "table")[[1]])
sch <-sch[, -1]
str(sch)
## tibble [63 × 65] (S3: tbl_df/tbl/data.frame)
## $ Age : int [1:63] 14 15 14 15 15 15 14 10 15 19 ...
## $ DX : int [1:63] 1 1 1 1 1 1 1 1 1 1 ...
## $ Sex : int [1:63] 1 1 2 1 2 1 1 1 2 1 ...
## $ FS_IQ : int [1:63] 118 116 107 107 107 100 94 114 107 95 ...
## $ TBV : int [1:63] 1663407 1583940 1299470 1535137 1431890 1578698 1453510 1650348 1288971 1366346 ...
## $ GMV : int [1:63] 654981 527918 550035 599372 593306 822056 534556 608916 479707 699476 ...
## $ WMV : int [1:63] 840874 851560 614598 780127 695296 546395 730155 860876 668576 479998 ...
## $ CSF : int [1:63] 167553 204463 134837 155638 143288 210247 188800 180555 140688 186872 ...
## $ Background : int [1:63] 8362251 8586107 8627510 8607550 8617899 8593510 8624978 8631722 8629619 8624830 ...
## $ L_superior_frontal_gyrus : int [1:63] 67276 19866 15450 15483 18538 16515 17285 18709 18248 16847 ...
## $ R_superior_frontal_gyrus : int [1:63] 66079 17346 14219 13942 17094 15956 15062 20009 15918 15280 ...
## $ L_middle_frontal_gyrus : int [1:63] 63661 11056 8647 11253 10254 8550 8461 8386 8063 9105 ...
## $ R_middle_frontal_gyrus : int [1:63] 67684 7676 6463 9433 7201 5537 6283 7556 5728 4614 ...
## $ L_inferior_frontal_gyrus : int [1:63] 29368 6050 6370 6636 6980 6531 4791 9224 6346 5690 ...
## $ R_inferior_frontal_gyrus : int [1:63] 25693 5125 4773 4764 5692 4788 3742 5770 4339 3872 ...
## $ L_precentral_gyrus : int [1:63] 30471 1352 1308 1280 1095 1202 1199 1260 1215 1280 ...
## $ R_precentral_gyrus : int [1:63] 34528 1837 1504 1681 1607 1111 1584 1592 1123 1524 ...
## $ L_middle_orbitofrontal_gyrus : int [1:63] 8294 747 528 778 804 754 784 796 663 558 ...
## $ R_middle_orbitofrontal_gyrus : int [1:63] 9723 534 401 586 437 538 508 440 428 352 ...
## $ L_lateral_orbitofrontal_gyrus: int [1:63] 6816 608 594 750 759 721 562 889 654 571 ...
## $ R_lateral_orbitofrontal_gyrus: int [1:63] 4960 41 34 45 32 41 25 51 44 29 ...
## $ L_gyrus_rectus : int [1:63] 2180 329 277 227 335 311 247 297 321 270 ...
## $ R_gyrus_rectus : int [1:63] 2459 299 286 295 320 479 265 357 334 339 ...
## $ L_postcentral_gyrus : int [1:63] 23126 167 164 184 159 232 156 207 191 188 ...
## $ R_postcentral_gyrus : int [1:63] 20692 408 288 271 304 300 284 285 266 278 ...
## $ L_superior_parietal_gyrus : int [1:63] 22016 3851 2771 2531 3089 2928 2469 3643 2846 2962 ...
## $ R_superior_parietal_gyrus : int [1:63] 24604 3090 3415 3481 3273 3967 2377 3448 3247 2521 ...
## $ L_supramarginal_gyrus : int [1:63] 15130 1492 1177 1270 1017 1227 902 1562 1083 941 ...
## $ R_supramarginal_gyrus : int [1:63] 15955 1577 1116 1402 1559 2087 1111 1370 1482 1451 ...
## $ L_angular_gyrus : int [1:63] 17463 477 312 370 355 435 332 444 321 341 ...
## $ R_angular_gyrus : int [1:63] 20257 2430 1804 1938 2154 2724 1622 2117 2119 2243 ...
## $ L_precuneus : int [1:63] 14977 1724 1120 827 1103 1200 1071 1337 1162 1159 ...
## $ R_precuneus : int [1:63] 14054 1596 1161 706 1251 1327 1077 1297 1097 1052 ...
## $ L_superior_occipital_gyrus : int [1:63] 6550 25 30 34 26 34 25 38 26 30 ...
## $ R_superior_occipital_gyrus : int [1:63] 6878 55 52 56 73 77 72 63 59 70 ...
## $ L_middle_occipital_gyrus : int [1:63] 19487 1151 931 1125 1062 1152 982 1153 951 953 ...
## $ R_middle_occipital_gyrus : int [1:63] 21535 1715 1403 1701 1572 1831 1483 1794 1429 1488 ...
## $ L_inferior_occipital_gyrus : int [1:63] 8896 588 469 537 525 602 488 575 474 499 ...
## $ R_inferior_occipital_gyrus : int [1:63] 12041 1244 1001 1178 1120 1272 1079 1191 1013 1038 ...
## $ L_cuneus : int [1:63] 5631 191 173 218 234 261 231 220 160 227 ...
## $ R_cuneus : int [1:63] 7386 372 319 341 358 361 328 396 339 305 ...
## $ L_superior_temporal_gyrus : int [1:63] 31923 4118 3153 3534 3803 3045 3730 3554 3432 3674 ...
## $ R_superior_temporal_gyrus : int [1:63] 30222 9923 8239 9404 9395 9372 8868 9583 8230 9363 ...
## $ L_middle_temporal_gyrus : int [1:63] 30234 2175 1758 2307 2269 1839 1910 2228 2100 1885 ...
## $ R_middle_temporal_gyrus : int [1:63] 30419 2294 1864 2060 2146 1500 1824 2107 1810 1752 ...
## $ L_inferior_temporal_gyrus : int [1:63] 23116 715 528 654 735 601 598 579 665 575 ...
## $ R_inferior_temporal_gyrus : int [1:63] 27490 514 403 552 515 420 436 505 461 380 ...
## $ L_parahippocampal_gyrus : int [1:63] 6772 946 724 957 842 815 801 712 776 842 ...
## $ R_parahippocampal_gyrus : int [1:63] 6461 704 633 706 720 744 681 710 618 692 ...
## $ L_lingual_gyrus : int [1:63] 15609 1863 1379 1612 1721 1476 1456 1307 1423 1435 ...
## $ R_lingual_gyrus : int [1:63] 18191 2035 1801 2024 1902 2244 1626 1446 1797 1573 ...
## $ L_fusiform_gyrus : int [1:63] 17755 856 773 841 790 835 956 628 763 730 ...
## $ R_fusiform_gyrus : int [1:63] 14817 1634 1268 1277 1167 1541 1547 1000 1102 1467 ...
## $ L_insular_cortex : int [1:63] 10054 4524 3884 4080 4317 5076 4082 5077 4143 4201 ...
## $ R_insular_cortex : int [1:63] 9670 1210 1034 1141 1134 1424 1189 1444 1096 948 ...
## $ L_cingulate_gyrus : int [1:63] 16570 2085 1503 1736 1602 1813 1835 1692 1645 1657 ...
## $ R_cingulate_gyrus : int [1:63] 16524 2596 1969 2009 2063 2244 2446 2551 2215 2460 ...
## $ L_caudate : int [1:63] 1567 142 143 148 159 139 198 142 145 155 ...
## $ R_caudate : int [1:63] 2391 396 290 353 311 343 313 333 273 400 ...
## $ L_putamen : int [1:63] 4556 5827 4789 4547 5003 6233 4769 5512 5018 5093 ...
## $ R_putamen : int [1:63] 2474 1042 938 907 906 935 802 1011 882 743 ...
## $ L_hippocampus : int [1:63] 5134 860 665 892 754 817 752 840 848 749 ...
## $ R_hippocampus : int [1:63] 4742 535 457 551 530 564 551 661 464 489 ...
## $ cerebellum : int [1:63] 168778 103469 89234 93754 79980 109382 90110 63340 83317 88166 ...
## $ brainstem : int [1:63] 31055 15778 13864 18444 16316 15397 14017 12202 12830 15030 ...
library(tidyr)
library(reshape2)
library(ggplot2)
d <- melt(sch[,c(1,4:9)])
## No id variables; using all as measure variables
ggplot(d,aes(value)) +
facet_wrap(~variable,scales = "free_x") +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
sch2<-sch[,2:3]
sch2$Sex<-as.factor(sch2$Sex)
sch2$DX<-as.factor(sch2$DX)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sch2$Sex<- recode(sch2$Sex, "1"="Male","2"="Female")
sch2$DX<- recode(sch2$DX, "1"="Normal","2"="Schizophrenia")
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.2.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
plot1<-ggplot(sch2,aes(Sex)) +
geom_histogram(stat="count")
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`
plot2<-ggplot(sch2,aes(DX)) +
geom_histogram(stat="count")
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`
grid.arrange(plot1, plot2, ncol=2)
Use derived neuroimaging biomarkers (e.g., Age,
FS_IQ, TBV, GMV, WMV, CSF,
Background, L_superior_frontal_gyrus,
R_superior_frontal_gyrus, …, brainstem) to train a
NN model and predict DX (Normals=1;
Schizophrenia=2)
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
schn<-as.data.frame(lapply(sch, normalize))
sub<-sample(nrow(schn), floor(nrow(schn)*0.75))
sch_train<-schn[sub, ]
sch_test<-schn[-sub, ]
m <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3])
plot(m)
sch_pred<-neuralnet::compute(m, sch_test[,-3])
pred_results<-sch_pred$net.result
cor(pred_results, sch_test$DX)
## [,1]
## [1,] 0.1493783
plot_ly() %>%
add_markers(x=pred_results, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(x=pred_results, y=sch_test$DX), 2)),
xaxis = list(title="NN (hidden=4) DX Predictions"),
yaxis = list(title="(Normalized) Observed Real Estate"),
legend = list(orientation = 'h'))
Try one hidden layer with different number of nodes
m2 <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3],hidden=2)
plot(m2)
sch_pred2<-neuralnet::compute(m2, sch_test[,-3])
pred_results2<-sch_pred2$net.result
cor(pred_results2, sch_test$DX)
## [,1]
## [1,] 0.391595
plot_ly() %>%
add_markers(x=pred_results2, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(pred_results2, sch_test$DX), 2)),
xaxis = list(title="NN (hidden=2) DX Predictions"),
yaxis = list(title="(Normalized) Observed DX"),
legend = list(orientation = 'h'))
m3 <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3],hidden=3)
plot(m3)
sch_pred3<-neuralnet::compute(m3, sch_test[,-3])
pred_results3<-sch_pred3$net.result
cor(pred_results3, sch_test$DX)
## [,1]
## [1,] -0.05391492
plot_ly() %>%
add_markers(x=pred_results3, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(pred_results3, sch_test$DX), 2)),
xaxis = list(title="NN (hidden=3) DX Predictions"),
yaxis = list(title="(Normalized) Observed DX"),
legend = list(orientation = 'h'))
m4 <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3],hidden=4)
plot(m4)
sch_pred4<-neuralnet::compute(m4, sch_test[,-3])
pred_results4<-sch_pred4$net.result
cor(pred_results4, sch_test$DX)
## [,1]
## [1,] 0.009721545
plot_ly() %>%
add_markers(x=pred_results4, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(pred_results4, sch_test$DX), 2)),
xaxis = list(title="NN (hidden=4) DX Predictions"),
yaxis = list(title="(Normalized) Observed DX"),
legend = list(orientation = 'h'))
Try multiple hidden layers and compare the results to the single layer. Which model is better?
m5 <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3],hidden=c(2,3,3))
plot(m5)
sch_pred5<-neuralnet::compute(m5, sch_test[,-3])
pred_results5<-sch_pred5$net.result
cor(pred_results5, sch_test$DX)
## [,1]
## [1,] 0.461141
plot_ly() %>%
add_markers(x=pred_results5, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(pred_results5, sch_test$DX), 2)),
xaxis = list(title="NN (hidden=(2,3,3)) DX Predictions"),
yaxis = list(title="(Normalized) Observed DX"),
legend = list(orientation = 'h'))
m6 <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3],hidden=c(3,2,3))
plot(m6)
sch_pred6<-neuralnet::compute(m6, sch_test[,-3])
pred_results6<-sch_pred6$net.result
cor(pred_results6, sch_test$DX)
## [,1]
## [1,] 0.5014888
plot_ly() %>%
add_markers(x=pred_results6, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(pred_results6, sch_test$DX), 2)),
xaxis = list(title="NN (hidden=(3,2,3)) DX Predictions"),
yaxis = list(title="(Normalized) Observed DX"),
legend = list(orientation = 'h'))
m7 <- neuralnet::neuralnet(DX ~ ., data=sch_train[,-3],hidden=c(3,3,2))
plot(m7)
sch_pred7<-neuralnet::compute(m7, sch_test[,-3])
pred_results7<-sch_pred7$net.result
cor(pred_results7, sch_test$DX)
## [,1]
## [1,] 0.6271454
plot_ly() %>%
add_markers(x=pred_results7, y=sch_test$DX,
name="Data Scatter", type="scatter", mode="markers") %>%
add_trace(x = c(0,1), y = c(0,1), type="scatter", mode="lines",
line = list(width = 4), name="Ideal Agreement") %>%
layout(title=paste0('Scatterplot (Normalized) Observed vs. Predicted DX Values, Cor(Obs,Pred)=',
round(cor(pred_results7, sch_test$DX), 2)),
xaxis = list(title="NN (hidden=(3,3,2)) DX Predictions"),
yaxis = list(title="(Normalized) Observed DX"),
legend = list(orientation = 'h'))
## Multiple layers in general is better than Single layer.Compare the type I (false-positive) and type II (false-negative) errors for the alternative methods
summary(as.factor(schn$DX))
## 0 1
## 32 31
set.seed(2023)
train = sample(1:nrow(schn),0.7*nrow(schn))
schn_tr = schn[train,]
schn_ts = schn[-train,]
train_x = schn_tr[,-3]
train_y = schn_tr[,2]
colnames(train_x)
## [1] "Age" "DX"
## [3] "FS_IQ" "TBV"
## [5] "GMV" "WMV"
## [7] "CSF" "Background"
## [9] "L_superior_frontal_gyrus" "R_superior_frontal_gyrus"
## [11] "L_middle_frontal_gyrus" "R_middle_frontal_gyrus"
## [13] "L_inferior_frontal_gyrus" "R_inferior_frontal_gyrus"
## [15] "L_precentral_gyrus" "R_precentral_gyrus"
## [17] "L_middle_orbitofrontal_gyrus" "R_middle_orbitofrontal_gyrus"
## [19] "L_lateral_orbitofrontal_gyrus" "R_lateral_orbitofrontal_gyrus"
## [21] "L_gyrus_rectus" "R_gyrus_rectus"
## [23] "L_postcentral_gyrus" "R_postcentral_gyrus"
## [25] "L_superior_parietal_gyrus" "R_superior_parietal_gyrus"
## [27] "L_supramarginal_gyrus" "R_supramarginal_gyrus"
## [29] "L_angular_gyrus" "R_angular_gyrus"
## [31] "L_precuneus" "R_precuneus"
## [33] "L_superior_occipital_gyrus" "R_superior_occipital_gyrus"
## [35] "L_middle_occipital_gyrus" "R_middle_occipital_gyrus"
## [37] "L_inferior_occipital_gyrus" "R_inferior_occipital_gyrus"
## [39] "L_cuneus" "R_cuneus"
## [41] "L_superior_temporal_gyrus" "R_superior_temporal_gyrus"
## [43] "L_middle_temporal_gyrus" "R_middle_temporal_gyrus"
## [45] "L_inferior_temporal_gyrus" "R_inferior_temporal_gyrus"
## [47] "L_parahippocampal_gyrus" "R_parahippocampal_gyrus"
## [49] "L_lingual_gyrus" "R_lingual_gyrus"
## [51] "L_fusiform_gyrus" "R_fusiform_gyrus"
## [53] "L_insular_cortex" "R_insular_cortex"
## [55] "L_cingulate_gyrus" "R_cingulate_gyrus"
## [57] "L_caudate" "R_caudate"
## [59] "L_putamen" "R_putamen"
## [61] "L_hippocampus" "R_hippocampus"
## [63] "cerebellum" "brainstem"
test_x = schn_ts[,-3]
test_y = schn_ts[,2]
train_y_ind = model.matrix(~factor(train_y)-1)
colnames(train_y_ind) = c("Normals", "Schizophrenia")
train = cbind(train_x, train_y_ind)
nn_single = neuralnet::neuralnet(Normals+Schizophrenia~.,
data = train,
hidden=4,
linear.output=FALSE,
lifesign='full', lifesign.step=5000)
## hidden: 4 thresh: 0.01 rep: 1/1 steps: 32 error: 0.0119 time: 0.01 secs
nn_single2 = neuralnet::neuralnet(Normals+Schizophrenia~.,
data = train,
hidden=c(4,3),
linear.output=FALSE,
lifesign='full', lifesign.step=5000)
## hidden: 4, 3 thresh: 0.01 rep: 1/1 steps: 41 error: 0.01072 time: 0.01 secs
pred = function(nn, dat) {
yhat = neuralnet::compute(nn, dat)$net.result
yhat = apply(yhat, 1, which.max)-1
return(yhat)
}
mean(pred(nn_single, schn_ts[,-3]) != as.factor(schn_ts[,2]))
## [1] 0
mean(pred(nn_single2, schn_ts[,-3]) != as.factor(schn_ts[,2]))
## [1] 0
table(pred(nn_single, schn_ts[,-3]), as.factor(schn_ts[,2]))
##
## 0 1
## 0 9 0
## 1 0 10
table(pred(nn_single2, schn_ts[,-3]), as.factor(schn_ts[,2]))
##
## 0 1
## 0 9 0
## 1 0 10
## Classification resulted in no false-positive or false-negative for single or multiple layers.Train separate models to predict DX (diagnosis) for the Male and Female cohorts, respectively. Explain your findings.
male<-schn[schn$Sex==0,]
sub<-sample(nrow(male), floor(nrow(male)*0.75))
male_train<-male[sub, ]
male_test<-male[-sub, ]
male2 <- neuralnet::neuralnet(DX ~ ., data=male_train[,-3])
plot(male2)
m_pred<-neuralnet::compute(male2, male_test[,-3])
pred_results<-m_pred$net.result
cor(pred_results, male_test$DX)
## [,1]
## [1,] 0.5140658
train_y_ind = model.matrix(~factor(male_train$DX)-1)
colnames(train_y_ind) = c("Normals", "Schizophrenia")
train = cbind(male_train[,-3], train_y_ind)
m_single = neuralnet::neuralnet(Normals+Schizophrenia~.,
data = train,
hidden=4,
linear.output=FALSE,
lifesign='full', lifesign.step=5000)
## hidden: 4 thresh: 0.01 rep: 1/1 steps: 34 error: 0.01689 time: 0.01 secs
table(as.factor(male_test[,2]),pred(m_single, male_test[,-3]))
##
## 0 1
## 0 4 0
## 1 2 3
fmale<-schn[schn$Sex==1,]
sub<-sample(nrow(fmale), floor(nrow(fmale)*0.75))
fmale_train<-fmale[sub, ]
fmale_test<-fmale[-sub, ]
fmale2 <- neuralnet::neuralnet(DX ~ ., data=fmale_train[,-3])
plot(fmale2)
f_pred<-neuralnet::compute(fmale2, fmale_test[,-3])
pred_results<-f_pred$net.result
cor(pred_results, fmale_test$DX)
## [,1]
## [1,] 0.7493451
train_y_ind = model.matrix(~factor(fmale_train$DX)-1)
colnames(train_y_ind) = c("Normals", "Schizophrenia")
train = cbind(fmale_train[,-3], train_y_ind)
f_single = neuralnet::neuralnet(Normals+Schizophrenia~.,
data = train,
hidden=4,
linear.output=FALSE,
lifesign='full', lifesign.step=5000)
## hidden: 4 thresh: 0.01 rep: 1/1 steps: 32 error: 0.01305 time: 0.01 secs
table(as.factor(fmale_test[,2]),pred(f_single, fmale_test[,-3]))
##
## 0 1
## 0 3 0
## 1 1 3
##Higher correlation for female cohort than male. Male has higher ratio of false negative with more total cases, where male patient who have Schizophrenia are diagnosed as normal. Train an SVM (using ksvm and
svm in e1071) for Age,
FS_IQ, TBV, GMV, WMV, CSF,
Background to predict DX. Compare the results of
linear, Gaussian and polynomial SVM kernels
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
set.seed(2023)
sch_classifier <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:9)], kernel = "vanilladot")
## Setting default kernel parameters
sch_predictions <- predict(sch_classifier, sch_test[,c(1:2,4:9)])
agreement <- sch_predictions == sch_test$DX
table(agreement)
## agreement
## FALSE TRUE
## 4 12
prop.table(table(agreement))
## agreement
## FALSE TRUE
## 0.25 0.75
sch_classifier2 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:9)], kernel = "rbfdot")
sch_predictions2 <- predict(sch_classifier2, sch_test[,c(1:2,4:9)])
agreement2 <- sch_predictions2 == sch_test$DX
table(agreement2)
## agreement2
## FALSE TRUE
## 3 13
prop.table(table(agreement2))
## agreement2
## FALSE TRUE
## 0.1875 0.8125
sch_classifier3 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:9)], kernel = "polydot")
## Setting default kernel parameters
sch_predictions3 <- predict(sch_classifier3, sch_test[,c(1:2,4:9)])
agreement3 <- sch_predictions3 == sch_test$DX
table(agreement3)
## agreement3
## FALSE TRUE
## 4 12
prop.table(table(agreement3))
## agreement3
## FALSE TRUE
## 0.25 0.75
library(e1071)
sch.svm_1 <- svm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:9)],
kernel="linear", cost=1)
table(sch_test$DX, predict(sch.svm_1, sch_test[,c(1:2,4:9)]))
##
## 0 1
## 0 6 3
## 1 1 6
sch.svm_2 <- svm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:9)],
kernel="radial", cost=1)
table(sch_test$DX, predict(sch.svm_2, sch_test[,c(1:2,4:9)]))
##
## 0 1
## 0 7 2
## 1 1 6Add Sex to your models and see if this makes a difference
sch_classifier4 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:9)], kernel = "vanilladot")
## Setting default kernel parameters
sch_predictions4 <- predict(sch_classifier4, sch_test[,c(1:9)])
agreement4 <- sch_predictions4 == sch_test$DX
table(agreement4)
## agreement4
## FALSE TRUE
## 4 12
prop.table(table(agreement4))
## agreement4
## FALSE TRUE
## 0.25 0.75
sch_classifier5 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:9)], kernel = "rbfdot")
sch_predictions5 <- predict(sch_classifier5, sch_test[,c(1:9)])
agreement5 <- sch_predictions5 == sch_test$DX
table(agreement5)
## agreement5
## FALSE TRUE
## 3 13
prop.table(table(agreement5))
## agreement5
## FALSE TRUE
## 0.1875 0.8125
sch_classifier6 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:9)], kernel = "polydot")
## Setting default kernel parameters
sch_predictions6 <- predict(sch_classifier6, sch_test[,c(1:9)])
agreement6 <- sch_predictions6 == sch_test$DX
table(agreement6)
## agreement6
## FALSE TRUE
## 4 12
prop.table(table(agreement6))
## agreement6
## FALSE TRUE
## 0.25 0.75
sch.svm_3 <- svm(as.factor(DX) ~ ., data = sch_train[,c(1:9)],
kernel="linear", cost=1)
table(sch_test$DX, predict(sch.svm_3, sch_test[,c(1:9)]))
##
## 0 1
## 0 6 3
## 1 1 6
sch.svm_4 <- svm(as.factor(DX) ~ ., data = sch_train[,c(1:9)],
kernel="radial", cost=1)
table(sch_test$DX, predict(sch.svm_4, sch_test[,c(1:9)]))
##
## 0 1
## 0 7 2
## 1 1 6
## adding gender resulted in similar output as previous question. Thus, gender did not make any difference.Expand the model by training on all derived neuroimaging biomarkers and re-train the SVM using Age, FS_IQ, TBV, GMV, WMV, CSF, Background, L_superior_frontal_gyrus, R_superior_frontal_gyrus, …, brainstem. Again, try linear, Gaussian and polynomial kernels. Compare the results
sch_classifier7 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:65)], kernel = "vanilladot")
## Setting default kernel parameters
sch_predictions7 <- predict(sch_classifier7, sch_test[,c(1:2,4:65)])
agreement7 <- sch_predictions7 == sch_test$DX
table(agreement7)
## agreement7
## FALSE TRUE
## 4 12
prop.table(table(agreement7))
## agreement7
## FALSE TRUE
## 0.25 0.75
sch_classifier8 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:65)], kernel = "rbfdot")
sch_predictions8 <- predict(sch_classifier8, sch_test[,c(1:2,4:65)])
agreement8 <- sch_predictions8 == sch_test$DX
table(agreement8)
## agreement8
## FALSE TRUE
## 4 12
prop.table(table(agreement8))
## agreement8
## FALSE TRUE
## 0.25 0.75
sch_classifier9 <- ksvm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:65)], kernel = "polydot")
## Setting default kernel parameters
sch_predictions9 <- predict(sch_classifier9, sch_test[,c(1:2,4:65)])
agreement9 <- sch_predictions9 == sch_test$DX
table(agreement9)
## agreement9
## FALSE TRUE
## 4 12
prop.table(table(agreement9))
## agreement9
## FALSE TRUE
## 0.25 0.75
sch.svm_5 <- svm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:65)],
kernel="linear", cost=1)
table(sch_test$DX, predict(sch.svm_5, sch_test[,c(1:2,4:65)]))
##
## 0 1
## 0 6 3
## 1 1 6
sch.svm_6 <- svm(as.factor(DX) ~ ., data = sch_train[,c(1:2,4:65)],
kernel="radial", cost=1)
table(sch_test$DX, predict(sch.svm_6, sch_test[,c(1:2,4:65)]))
##
## 0 1
## 0 6 3
## 1 1 6Are there differences between the alternative kernels?
## Yes, there are differences between linear and radial kernel. With lesser variables included in the model, linear and radial resulted in different prediction in confusion matrix. As more variables included, both kernels will have identical matrix. For Age, FS_IQ, TBV, GMV, WMV, CSF, and Background, tune parameters for Gaussian and polynomial kernels
tune_rbf = tune.svm(as.factor(DX) ~., kernel = "radial",data = sch_train[,c(1:2,4:9)],cost = exp(-5:8))
tune_rbf
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 2.718282
##
## - best performance: 0.26
tune_poly = tune.svm(as.factor(DX) ~., kernel = "polynomial", data = sch_train[,c(1:2,4:9)],cost = exp(-5:8))
tune_poly
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 20.08554
##
## - best performance: 0.19Draw a CV (cross-validation) plot and interpret the resulting graph
library(sparsediscrim);library (reshape); library(ggplot2)
## Warning: package 'sparsediscrim' was built under R version 4.2.3
## Warning: package 'reshape' was built under R version 4.2.3
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
## The following object is masked from 'package:plotly':
##
## rename
sub<-sample(nrow(sch), floor(nrow(sch)*0.75))
sch_train<-sch[sub, ]
sch_test<-sch[-sub, ]
folds = cv_partition(sch$DX, num_folds = 5)
train_cv_error_svm = function(costC) {
#Train
ir.svm = svm(as.factor(DX)~., data=sch,
kernel="radial", cost=costC)
train_error = sum(ir.svm$fitted != sch$DX) / nrow(sch)
#Test
test_error = sum(predict(ir.svm, sch_test) != sch_test$DX) / nrow(sch_test)
#CV error
ire.cverr = sapply(folds, function(fold) {
svmcv = svm(as.factor(DX)~.,data = sch, kernel="radial", cost=costC, subset = fold$training)
svmpred = predict(svmcv, sch[fold$test,])
return(sum(svmpred != sch$DX[fold$test]) / length(fold$test))
})
cv_error = mean(ire.cverr)
return(c(train_error, cv_error, test_error))
}
costs = exp(-5:8)
ir_cost_errors = sapply(costs, function(cost) train_cv_error_svm(cost))
df_errs = data.frame(t(ir_cost_errors), costs)
colnames(df_errs) = c('Train', 'CV', 'Test', 'Logcost')
dataL <- melt(df_errs, id="Logcost")
library(plotly)
plot_ly(dataL, x = ~log(Logcost), y = ~value, color = ~variable,
colors = c('blue', 'red', "green"), type="scatter", mode="lines") %>%
layout(xaxis = list(title = 'log(Cost)'), yaxis = list(title = 'Classifier Error'), legend = list(orientation = 'h'),
title="SVM CV-Plot of Model Performance (5 folds)") %>% hide_colorbar()
## 10 folds
folds = cv_partition(sch$DX, num_folds = 10)
train_cv_error_svm = function(costC) {
sch.svm = svm(as.factor(DX)~., data=sch,
kernel="radial", cost=costC)
train_error = sum(sch.svm$fitted != sch$DX) / nrow(sch)
test_error = sum(predict(sch.svm, sch_test) != sch_test$DX) / nrow(sch_test)
#CV error
ire.cverr = sapply(folds, function(fold) {
svmcv = svm(as.factor(DX)~.,data = sch, kernel="radial", cost=costC, subset = fold$training)
svmpred = predict(svmcv, sch[fold$test,])
return(sum(svmpred != sch$DX[fold$test]) / length(fold$test))
})
cv_error = mean(ire.cverr)
return(c(train_error, cv_error, test_error))
}
costs = exp(-5:8)
ir_cost_errors = sapply(costs, function(cost) train_cv_error_svm(cost))
df_errs = data.frame(t(ir_cost_errors), costs)
colnames(df_errs) = c('Train', 'CV', 'Test', 'Logcost')
dataL <- melt(df_errs, id="Logcost")
plot_ly(dataL, x = ~log(Logcost), y = ~value, color = ~variable,
colors = c('blue', 'red', "green"), type="scatter", mode="lines") %>%
layout(xaxis = list(title = 'log(Cost)'), yaxis = list(title = 'Classifier Error'), legend = list(orientation = 'h'),
title="SVM CV-Plot of Model Performance (10 folds)") %>% hide_colorbar()
## The trends begin to decreased classifier errors at logcost=-2. All three trends have the lowest error and stabilise at cost = 5; Train and test data stabilise at 0 error while CV trend stabilise at 0.25 error. Use different random seed and repeat the experiment, are the results stable?
set.seed(1234)
## 5 folds
sub<-sample(nrow(sch), floor(nrow(sch)*0.75))
sch_train<-sch[sub, ]
sch_test<-sch[-sub, ]
folds = cv_partition(sch$DX, num_folds = 5)
train_cv_error_svm = function(costC) {
#Train
ir.svm = svm(as.factor(DX)~., data=sch,
kernel="radial", cost=costC)
train_error = sum(ir.svm$fitted != sch$DX) / nrow(sch)
#Test
test_error = sum(predict(ir.svm, sch_test) != sch_test$DX) / nrow(sch_test)
#CV error
ire.cverr = sapply(folds, function(fold) {
svmcv = svm(as.factor(DX)~.,data = sch, kernel="radial", cost=costC, subset = fold$training)
svmpred = predict(svmcv, sch[fold$test,])
return(sum(svmpred != sch$DX[fold$test]) / length(fold$test))
})
cv_error = mean(ire.cverr)
return(c(train_error, cv_error, test_error))
}
costs = exp(-5:8)
ir_cost_errors = sapply(costs, function(cost) train_cv_error_svm(cost))
df_errs = data.frame(t(ir_cost_errors), costs)
colnames(df_errs) = c('Train', 'CV', 'Test', 'Logcost')
dataL <- melt(df_errs, id="Logcost")
library(plotly)
plot_ly(dataL, x = ~log(Logcost), y = ~value, color = ~variable,
colors = c('blue', 'red', "green"), type="scatter", mode="lines") %>%
layout(xaxis = list(title = 'log(Cost)'), yaxis = list(title = 'Classifier Error'), legend = list(orientation = 'h'),
title="SVM CV-Plot of Model Performance (5 folds)") %>% hide_colorbar()
## 10 folds
folds = cv_partition(sch$DX, num_folds = 10)
train_cv_error_svm = function(costC) {
sch.svm = svm(as.factor(DX)~., data=sch,
kernel="radial", cost=costC)
train_error = sum(sch.svm$fitted != sch$DX) / nrow(sch)
test_error = sum(predict(sch.svm, sch_test) != sch_test$DX) / nrow(sch_test)
#CV error
ire.cverr = sapply(folds, function(fold) {
svmcv = svm(as.factor(DX)~.,data = sch, kernel="radial", cost=costC, subset = fold$training)
svmpred = predict(svmcv, sch[fold$test,])
return(sum(svmpred != sch$DX[fold$test]) / length(fold$test))
})
cv_error = mean(ire.cverr)
return(c(train_error, cv_error, test_error))
}
costs = exp(-5:8)
ir_cost_errors = sapply(costs, function(cost) train_cv_error_svm(cost))
df_errs = data.frame(t(ir_cost_errors), costs)
colnames(df_errs) = c('Train', 'CV', 'Test', 'Logcost')
dataL <- melt(df_errs, id="Logcost")
plot_ly(dataL, x = ~log(Logcost), y = ~value, color = ~variable,
colors = c('blue', 'red', "green"), type="scatter", mode="lines") %>%
layout(xaxis = list(title = 'log(Cost)'), yaxis = list(title = 'Classifier Error'), legend = list(orientation = 'h'),
title="SVM CV-Plot of Model Performance (10 folds)") %>% hide_colorbar()
### Classification error of CV Trend + 0.5 with 10 folds for both seed, although the trend remain similar with previous seed. The result is identical for both seeds if 5 folds. Overall, the result is stable if 5 folds but still retain stability at 10 folds.Inspecting the results above, explain why it makes sense to set a tune over a range such as \(exp(−5:8).\)
# To provide wider scope of trend and to examine the changes of cost when close to 0 and achieved stable classification error when situated far from 0. How can we design alternative tuning strategies other than greedy search?
gammas = exp(-5:5)
tune_g = tune.svm(as.factor(DX)~., data = as.data.frame(sch_train), cost = costs, gamma = gammas)
tune_g
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost
## 0.006737947 20.08554
##
## - best performance: 0.105
sch.svm_g = svm(as.factor(DX)~., kernel = "radial", data = as.data.frame(sch_train), gamma = 0.04978707, cost = 1)
sch.svm_g
##
## Call:
## svm(formula = as.factor(DX) ~ ., data = as.data.frame(sch_train),
## kernel = "radial", gamma = 0.04978707, cost = 1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 43
table(sch_test$DX, predict(sch.svm_g , sch_test))
##
## 1 2
## 1 4 3
## 2 3 6
agreement<-predict(sch.svm_g, sch_test)==sch_test$DX
prop.table(table(agreement))
## agreement
## FALSE TRUE
## 0.375 0.625
##We could further tune the parameter through the inclusion of gammas.